
************************************************************************************************
************************************************************************************************

*This .do file simply redoes the .do file "5a - Structural_Gain_Math.do", but in a bootstrap loop to get standard errors.

*NOTE: This file take a LONG time to run (roughly 1 month on my admittedly poor server setup)

************************************************************************************************
************************************************************************************************

*Get data ready for bootstrapping
clear all
use "/data_analysis/NC_RD_Retake/student_mb_2009to2017.dta"
set more off
set seed 55555

*No need for post-2012 data
drop if year>2012
*No grade 8 
drop if grade==8
*Deal with extends
replace smathscal=. if nc_extend_math==1
replace sreadscal=. if nc_extend_read==1
drop nc_extend_math nc_extend_math_retake nc_extend_read nc_extend_read_retake
*Need current or subsequent scores in math
drop if smathscal==.
drop if Fsmathscal==.

*Now let me draw the retest gain
gen retest_gain=math_retake-math

*Create a standardized retest gain (for each grade-year combination)
gen math_retake_stan=.
gen math_stan=.
gen retest_gain_stan=.
foreach g of numlist 3(1)7{
foreach t of numlist 2009(1)2012{
su math if grade==`g' & year==`t'
replace math_retake_stan=(math_retake-r(mean))/r(sd) if grade==`g' & year==`t'
replace math_stan=(math-r(mean))/r(sd) if grade==`g' & year==`t'
replace retest_gain_stan=math_retake_stan-math_stan if grade==`g' & year==`t'
}
}
compress
save "/data_analysis/NC_RD_Retake/very_temp.dta", replace

***Now with optimal choices, graph the resulting real and predicted data
clear all
set seed 55555
use "/data_analysis/NC_RD_Retake/very_temp.dta"
*Drop uneeded vars
drop mastid year grade lea schlcode schoolid retested_m math_retake read running_read retested_r read_retake sex lep swd eds aig_math aig_read repeating repeat_next_year ethnic charter Fsmathscal F2smathscal F3smathscal F4smathscal lag_sreadscal sreadscal Fsreadscal F2sreadscal F3sreadscal F4sreadscal math_reg_date math_retest_date read_reg_date read_retest_date

*Create a 'grid' to discretize the simulated score in 0.1\sigma increments
*Max and min of grade
su math_stan if running_math==0.5
local max=r(max)
su math_stan if running_math==-10.5
local min=r(min)

foreach b of numlist 1(1)14{
local bin`b'_min=(-0.7-`b'*0.1)-0.05
local bin`b'_max=(-0.7-`b'*0.1)+0.05
local bin`b'=(-0.7-`b'*0.1)
}
display `bin1_min', `bin1_max', `bin1'
display `bin14_min', `bin14_max', `bin14'

*Drop if missing retest and in retest range
drop if retest_gain_stan==. & math_stan>`bin14_min' & math_stan<`bin1_max'

*All Grade average reliability: 0.914
display 1*(1-0.914)^0.5

local sd=(0.914)^0.5
set obs 2500000
gen ability=rnormal(0,`sd')
su ability
display r(sd)^2
local sim_var=r(sd)^2

*Make var exactly 0.914
replace ability=((0.914/`sim_var')^0.5)*ability
su ability
display r(sd)^2

gen abs_ability=abs(ability)

*Drop anything unecessary for loop
drop math running_math lag_smathscal smathscal retest_gain math_retake_stan
qui compress
save "/data_analysis/NC_RD_Retake/temp_math_bootstrap.dta", replace




clear all
use "/data_analysis/NC_RD_Retake/temp_math_bootstrap.dta"
set seed 55555

capture program drop myprog
program define myprog, rclass
*Define the bins
tempfile one
save `one'
foreach b of numlist 1(1)14{
local bin`b'_min=(-0.7-`b'*0.1)-0.05
local bin`b'_max=(-0.7-`b'*0.1)+0.05
local bin`b'=(-0.7-`b'*0.1)
}
*Now loop through the theta_0s to find optimal
foreach t of numlist 0.1(0.001)0.25{
preserve
display "Theta 0 is `t'"
*Parameters
local theta_0=`t'

qui gen sd=.
qui gen min=.
*Here are the theta_1 guesses
foreach n of numlist 0.05(0.001)0.25{
local obs=`n'*10000
local theta_1=`n'
qui gen var=(`theta_0' + `theta_1'*(abs_ability))^2
qui gen sem=(var)^0.5
qui gen test_score_sim=ability+rnormal(0,sem)
qui su test_score_sim
qui replace sd=abs(r(sd)-1) if _n==`obs'
qui replace min=`n' if _n==`obs'
qui drop var sem test_score_sim
}
qui egen min_sd=min(sd)
qui su min if sd==min_sd

*Generate theta 1
local theta_1=r(mean)
qui drop min_sd sd min

qui gen var=(`theta_0' + `theta_1'*(abs_ability))^2
qui gen sem=(var)^0.5
qui gen test_score_sim=ability+rnormal(0,sem)
qui su test_score_sim
local temp=r(sd)
display "Ensure test score is normal: `temp'"

qui gen sim_retest_gain=ability-test_score_sim

*Discretize simulated test score
foreach n of numlist 1(1)14{
qui replace test_score_sim=`bin`n'' if test_score_sim>`bin`n'_min' & test_score_sim<`bin`n'_max'
}

*Cutoff
qui gen real_ind=(math_stan>`bin14_min' & math_stan<`bin1_max')
qui gen sim_ind=(test_score_sim>`bin14_min' & test_score_sim<`bin1_max')

qui keep if real_ind==1 | sim_ind==1

qui expand 2, gen(g)
qui drop if real_ind==0 & g==0
qui drop if real_ind==1 & g==1

qui replace retest_gain_stan=. if g==1
qui replace sim_retest_gain=. if g==0

*drop missing values (only occurs in the real data)
qui drop if retest_gain_stan==. & g==0

qui replace sim_ind=g

qui gen r_gain=retest_gain_stan if g==0
qui replace r_gain=sim_retest_gain if g==1

qui gen test=math_stan if g==0
qui replace test=test_score_sim if g==1

*Discretize real test score
foreach n of numlist 1(1)14{
qui replace test=`bin`n'' if test>`bin`n'_min' & test<`bin`n'_max' & g==0
}

qui gen number=1 if g==0
qui collapse (sum) number (mean) retest_gain_stan sim_retest_gain, by(test)

*Find test gain that minimizes MSE
qui set obs 350
qui gen min=.
qui gen gain=.
foreach gain of numlist 0.04(0.001)0.24{
local n=`gain'*1000
qui gen SST=(retest_gain_stan-sim_retest_gain-`gain')^2
qui display "number is `gain'"
qui su SST
qui replace min=r(mean) if _n==`n'
qui replace gain=`gain' if _n==`n'
qui drop SST
}

qui egen min_min=min(min)
qui su gain if min==min_min
qui local gain=r(mean)
qui su min if min==min_min
qui local min=r(mean)

display "Gain is `gain' and MSE is `min' and (theta0, theta1) is (`t',`theta_1')"
qui gen theta0=`t'
qui gen theta1=`theta_1'
qui gen struc_gain=`gain'
qui gen Error=`min'
qui gen var=`temp'
qui keep if _n==1
local z=`t'*10000
qui compress
qui save "/data_analysis/NC_RD_Retake/extreme_temp_`z'.dta", replace
restore
}
drop _all
foreach t of numlist 0.1(0.001)0.25{
local z=`t'*10000
append using "/data_analysis/NC_RD_Retake/extreme_temp_`z'.dta"
erase "/data_analysis/NC_RD_Retake/extreme_temp_`z'.dta"
}
drop if var>1.001
drop if var<0.999
egen min_Error=min(Error)
su theta0 if Error==min_Error, meanonly
return scalar theta0 = r(mean)
su theta1 if Error==min_Error, meanonly
return scalar theta1 = r(mean)
su struc_gain if Error==min_Error, meanonly
return scalar gain = r(mean)
su var if Error==min_Error, meanonly
return scalar var = r(mean)
drop _all
use `one'
end
display "Start: April 25 at 10:30"
bootstrap t0 = r(theta0) t1 = r(theta1) g = r(gain) test = r(var), reps(99): myprog

